This notebook investigates how fast using the edge store to do simple seeding queries is compared to loading in-memory, then operating with NetworkX
In [32]:
from pybel import BELGraph, Manager
import sys
import pandas as pd
from pybel.struct.summary import count_functions
from pybel.manager.models import Edge, network_edge
import time
from sqlalchemy import and_
from pybel.constants import VERSION
In [2]:
print(sys.version)
In [3]:
print(time.asctime())
In [4]:
print(VERSION)
In [5]:
m = Manager()
m
Out[5]:
In [6]:
pd.DataFrame(
{
'id': n.id,
'name': n.name,
'version': n.version
}
for n in m.list_recent_networks()
)
Out[6]:
In [ ]:
def print_summary(graph):
print(*sorted(count_functions(graph_by_network_id).items()), sep='\n')
In [16]:
%%time
graph_by_network_id = m.get_graph_by_id(11)
print_summary(graph_by_network_id)
In [8]:
def get_graph_by_network_edges(manager, network_id, **kwargs):
network = manager.get_network_by_id(network_id)
edges = network.edges
graph = BELGraph(**kwargs)
for edge in edges:
edge.insert_into_graph(graph)
return graph
In [9]:
%%time
graph_by_edges = get_graph_by_network_edges(m, 11)
print_summary(graph_by_edges)
This query works, but needs serious optimization to be generally useful, especially since this kind of query automatically eliminates the need to do in-memory graph join operations.
In [19]:
network_ids = [10, 2, 9]
In [31]:
%%time
graph_by_network_ids = m.get_graph_by_ids(network_ids)
print_summary(graph_by_network_ids)
In [29]:
def get_graph_by_networks_edges(manager, network_ids, **kwargs):
edges = manager.session.query(Edge).join(network_edge).filter(network_edge.c.network_id.in_(network_ids))
graph = BELGraph(**kwargs)
for edge in edges:
edge.insert_into_graph(graph)
return graph
In [30]:
%%time
graph_by_networks_edges = get_graph_by_networks_edges(m, network_ids)
print_summary(graph_by_networks_edges)
In [ ]:
graph =
In [ ]:
def get_graph_by_annotation(manager, keyword, value, **kwargs):
edges = manager.session.query(Edge).\
join(network_edge).join(edge_annotation).join(AnnotationEntry)\
filter(_and(network_edge.c.network_id.in_(network_ids),
edge_annotation.c.annotation_id == annotation_id))
graph = BELGraph(**kwargs)
for edge in edges:
edge.insert_into_graph(graph)
return graph
In [ ]:
annotation = m.get_annotation_entry